Introduction

This notebook will show the index of association across generations at intervals of 1,000 generations. In order to do this, all simulated files were uploaded to the CGRB infastructure and were analyzed with analyze_and_save_ia.R with no permutations. These were then saved in a directory called ia_over_generations/. The data for these values is in a directory on the infastructure called ia_over_generations_data/, but is quite large (151GB).

Setup

library('zksimanalysis') # Main package for analysis
library('magrittr')      # For the %T>%
library('dplyr')         # Magic data manipulation
library('tidyr')         # more magic
library('purrr')         # magic dealing with lists
library('ggplot2')
library('gtable')
library('grid')
library('viridis')

Data retrieval

The data were downloaded from the CGRB server using rsync with the following command.

Note: replace XXXXX with the port number. I am not writing it here for security reasons.

rsync --update -tavz -e "ssh -p XXXXX" --exclude 'twenty_loci_results' \
kamvarz@files.cgrb.oregonstate.edu:/nfs1/BPP/Grunwald_Lab/home/kamvarz/simulation_analysis/results/ \
../simulation_results/

Preparing Data

First the data from the first round of simulations with 6-10 alleles per locus.

datfiles  <- dir(here::here("data", "ia_over_generations_n1000/"), full.names = TRUE)
locfiles  <- dir(here::here("data", "locus_diversity_over_generations_n1000/"), full.names = TRUE)
datnames  <- setNames(datfiles, datfiles)
locnames  <- setNames(locfiles, locfiles)
for (i in datfiles){
  datnames[i] <- load(i)
}
for (i in locfiles){
  locnames[i] <- load(i)
}

account_for_source <- . %>%
  map_df(get, .id = "source") %>% # map the `get()` function and return a tibble
  mutate(mutation_rate = ifelse(grepl("mutant", source), "even", "uneven")) %>%
  mutate(source = gsub("(^.+?feather).*$", "\\1.DATA.rda", source))

ex_run  <- "high_mutation([0-9]+?)/"
ex_seed <- "seed_([0-9]+?)_"
ex_sex  <- "sex_([0-9.]+?)_"
ex_gen  <- "gen_([0-9]+?)_"
ex_rep  <- "rep_([0-9]+?).pop"
ex_samp <- "_sam_([0-9]+?)$"

datalist <- datnames %>%
  setNames(datnames) %>%          # set the names for binding id
  account_for_source %>%
  tidyr::extract(pop, c("run", "seed", "sexrate", "gen", "rep", "sample"),
          paste0(ex_run, ex_seed, ex_sex, ex_gen, ex_rep, ex_samp),
          remove = FALSE) %>%
  mutate(sample = factor(sample, unique(sample))) # Transform the sample to a factor

datalist <- locnames %>%
  setNames(locnames) %>%          # set the names for binding id
  account_for_source %>%
  inner_join(datalist, by = c("pop", "source", "mutation_rate"))


rm(list = datnames)
rm(list = locnames)
gc()
datalist
datfiles  <- dir(here::here("data", "ia_over_generations_n1000_more_alleles/"), full.names = TRUE)
locfiles  <- dir(here::here("data", "locus_diversity_over_generations_n1000_more_alleles/"), full.names = TRUE)
datnames  <- setNames(datfiles, datfiles)
locnames  <- setNames(locfiles, locfiles)
for (i in datfiles){
  datnames[i] <- load(i)
}
for (i in locfiles){
  locnames[i] <- load(i)
}

account_for_source <- . %>%
  map_df(get, .id = "source") %>% # map the `get()` function and return a tibble
  mutate(mutation_rate = ifelse(grepl("mutant", source), "even", "uneven")) %>%
  mutate(source = gsub("(^.+?feather).*$", "\\1.DATA.rda", source))

ex_run  <- "more_alleles_high_mutation([0-9]+?)/"
ex_seed <- "seed_([0-9]+?)_"
ex_sex  <- "sex_([0-9.]+?)_"
ex_gen  <- "gen_([0-9]+?)_"
ex_rep  <- "rep_([0-9]+?).pop"
ex_samp <- "_sam_([0-9]+?)$"

datalist_ma <- datnames %>%
  setNames(datnames) %>%          # set the names for binding id
  account_for_source %>%
  tidyr::extract(pop, c("run", "seed", "sexrate", "gen", "rep", "sample"),
          paste0(ex_run, ex_seed, ex_sex, ex_gen, ex_rep, ex_samp),
          remove = FALSE) %>%
  mutate(sample = factor(sample, unique(sample))) # Transform the sample to a factor

datalist_ma <- locnames %>%
  setNames(locnames) %>%          # set the names for binding id
  account_for_source %>%
  inner_join(datalist_ma, by = c("pop", "source", "mutation_rate"))


rm(list = datnames)
rm(list = locnames)
gc()
datalist_ma

Plotting

Now that we have the data, we can visualize the stability of the simulations over generations.

old_theme <- theme_set(theme_bw(base_size = 16, base_family = "Helvetica"))
old_theme <- theme_update(axis.text.x = element_text(angle = 90, vjust = 0.5))
sample_colors <- scale_color_viridis(discrete = TRUE, direction = -1, option = "D")
iaplot <- datalist %>%
  mutate(gen = round(as.numeric(gen), -3)/1000) %>%
  mutate(runseed = paste(run, seed)) %>%
  group_by(sexrate, gen, sample, mutation_rate, runseed) %>%
  summarize(rbarD = mean(rbarD, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = gen, y = rbarD, group = runseed, color = runseed)) +
  geom_line(alpha = 0.25) +
  facet_grid(mutation_rate ~ sexrate) +
  scale_x_continuous(breaks = c(2, 4, 6, 8)) +
  sample_colors +
  theme(text = element_text(size = 16)) +
  theme(strip.text = element_text(size = 16)) +
  theme(aspect.ratio = 0.75) +
  theme(panel.spacing = unit(0, "line")) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +
  theme(legend.position = "none") +
  labs(list(
    y = expression(bar(r)[d]),
    x = "generation (x1000)",
    title = "Index of association over 10,000 generations",
    subtitle = "6 to 10 alleles per locus",
    caption = "\nValues for 100 unique seed summarized over 10 replicates"
  ))
iaplot_ma <- datalist_ma %>%
  mutate(gen = round(as.numeric(gen), -3)/1000) %>%
  mutate(runseed = paste(run, seed)) %>%
  group_by(sexrate, gen, sample, mutation_rate, runseed) %>%
  summarize(rbarD = mean(rbarD, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = gen, y = rbarD, group = runseed, color = runseed)) +
  geom_line(alpha = 0.25) +
  facet_grid(mutation_rate ~ sexrate) +
  scale_x_continuous(breaks = c(2, 4, 6, 8)) +
  sample_colors +
  theme(text = element_text(size = 16)) +
  theme(strip.text = element_text(size = 16)) +
  theme(aspect.ratio = 0.75) +
  theme(panel.spacing = unit(0, "line")) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +
  theme(legend.position = "none") +
  labs(list(
    y = expression(bar(r)[d]),
    x = "generation (x1000)",
    title = "Index of association over 10,000 generations",
    subtitle = "10 to 20 alleles per locus",
    caption = "\nValues for 100 unique seed summarized over 10 replicates"
  ))
iaplot

iaplot_ma

If we looked at allelic diversity statistics:

Hexpplot <- datalist %>%
  mutate(gen = round(as.numeric(gen), -3)/1000) %>%
  mutate(runseed = paste(run, seed)) %>%
  group_by(sexrate, gen, sample, mutation_rate, runseed) %>%
  summarize(Hexp = mean(Hexp, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = gen, y = Hexp, group = runseed, color = runseed)) +
  geom_line(alpha = 0.25) +
  facet_grid(mutation_rate ~ sexrate) +
  scale_x_continuous(breaks = c(2, 4, 6, 8)) +
  sample_colors +
  theme(text = element_text(size = 16)) +
  theme(strip.text = element_text(size = 16)) +
  theme(aspect.ratio = 0.75) +
  theme(panel.spacing = unit(0, "line")) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +
  theme(legend.position = "none") +
  labs(list(
    y = expression(H[exp]),
    x = "generation (x1000)",
    title = "Nei's Gene Diversity over 10,000 generations",
    subtitle = "6 to 10 alleles per locus",
    caption = "\nValues for 100 unique seed summarized over 10 replicates"
  ))
Hexpplot

Hexpplot_ma <- datalist_ma %>%
  mutate(gen = round(as.numeric(gen), -3)/1000) %>%
  mutate(runseed = paste(run, seed)) %>%
  group_by(sexrate, gen, sample, mutation_rate, runseed) %>%
  summarize(Hexp = mean(Hexp, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = gen, y = Hexp, group = runseed, color = runseed)) +
  geom_line(alpha = 0.25) +
  facet_grid(mutation_rate ~ sexrate) +
  scale_x_continuous(breaks = c(2, 4, 6, 8)) +
  sample_colors +
  theme(text = element_text(size = 16)) +
  theme(strip.text = element_text(size = 16)) +
  theme(aspect.ratio = 0.75) +
  theme(panel.spacing = unit(0, "line")) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +
  theme(legend.position = "none") +
  labs(list(
    y = expression(H[exp]),
    x = "generation (x1000)",
    title = "Nei's Gene Diversity over 10,000 generations",
    subtitle = "10 to 20 alleles per locus",
    caption = "\nValues for 100 unique seed summarized over 10 replicates"
  ))
Hexpplot_ma

Evenplot <- datalist %>%
  mutate(gen = round(as.numeric(gen), -3)/1000) %>%
  mutate(runseed = paste(run, seed)) %>%
  group_by(sexrate, gen, sample, mutation_rate, runseed) %>%
  summarize(Evenness = mean(Evenness, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = gen, y = Evenness, group = runseed, color = runseed)) +
  geom_line(alpha = 0.25) +
  facet_grid(mutation_rate ~ sexrate) +
  scale_x_continuous(breaks = c(2, 4, 6, 8)) +
  sample_colors +
  theme(text = element_text(size = 16)) +
  theme(strip.text = element_text(size = 16)) +
  theme(aspect.ratio = 0.75) +
  theme(panel.spacing = unit(0, "line")) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +
  theme(legend.position = "none") +
  labs(list(
    y = expression(E[5][A]),
    x = "generation (x1000)",
    title = "Evenness over 10,000 generations",
    subtitle = "6 to 10 alleles per locus",
    caption = "\nValues for 100 unique seed summarized over 10 replicates"
  ))
Evenplot

Evenplot_ma <- datalist_ma %>%
  mutate(gen = round(as.numeric(gen), -3)/1000) %>%
  mutate(runseed = paste(run, seed)) %>%
  group_by(sexrate, gen, sample, mutation_rate, runseed) %>%
  summarize(Evenness = mean(Evenness, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = gen, y = Evenness, group = runseed, color = runseed)) +
  geom_line(alpha = 0.25) +
  facet_grid(mutation_rate ~ sexrate) +
  scale_x_continuous(breaks = c(2, 4, 6, 8)) +
  sample_colors +
  theme(text = element_text(size = 16)) +
  theme(strip.text = element_text(size = 16)) +
  theme(aspect.ratio = 0.75) +
  theme(panel.spacing = unit(0, "line")) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +
  theme(legend.position = "none") +
  labs(list(
    y = expression(E[5][A]),
    x = "generation (x1000)",
    title = "Evenness over 10,000 generations",
    subtitle = "10 to 20 alleles per locus",
    caption = "\nValues for 100 unique seed summarized over 10 replicates"
  ))
Evenplot_ma

Alleleplot <- datalist %>%
  mutate(gen = round(as.numeric(gen), -3)/1000) %>%
  mutate(runseed = paste(run, seed)) %>%
  group_by(sexrate, gen, sample, mutation_rate, runseed) %>%
  summarize(allele = mean(allele, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = gen, y = allele, group = runseed, color = runseed)) +
  geom_line(alpha = 0.25) +
  facet_grid(mutation_rate ~ sexrate) +
  scale_x_continuous(breaks = c(2, 4, 6, 8)) +
  sample_colors +
  theme(text = element_text(size = 16)) +
  theme(strip.text = element_text(size = 16)) +
  theme(aspect.ratio = 0.75) +
  theme(panel.spacing = unit(0, "line")) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +
  theme(legend.position = "none") +
  labs(list(
    y = "Mean number of alleles",
    x = "generation (x1000)",
    title = "Mean number of alleles over 10,000 generations",
    subtitle = "6 to 10 alleles per locus",
    caption = "\nValues for 100 unique seed summarized over 10 replicates"
  ))
Alleleplot

Alleleplot_ma <- datalist_ma %>%
  mutate(gen = round(as.numeric(gen), -3)/1000) %>%
  mutate(runseed = paste(run, seed)) %>%
  group_by(sexrate, gen, sample, mutation_rate, runseed) %>%
  summarize(allele = mean(allele, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = gen, y = allele, group = runseed, color = runseed)) +
  geom_line(alpha = 0.25) +
  facet_grid(mutation_rate ~ sexrate) +
  scale_x_continuous(breaks = c(2, 4, 6, 8)) +
  sample_colors +
  theme(text = element_text(size = 16)) +
  theme(strip.text = element_text(size = 16)) +
  theme(aspect.ratio = 0.75) +
  theme(panel.spacing = unit(0, "line")) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +
  theme(legend.position = "none") +
  labs(list(
    y = "Mean number of alleles",
    x = "generation (x1000)",
    title = "Mean number of alleles over 10,000 generations",
    subtitle = "10 to 20 alleles per locus",
    caption = "\nValues for 100 unique seed summarized over 10 replicates"
  ))
Alleleplot_ma

Session Information

options(width = 100)
devtools::session_info()
## Session info --------------------------------------------------------------------------------------
##  setting  value                       
##  version  R version 3.4.1 (2017-06-30)
##  system   x86_64, darwin15.6.0        
##  ui       X11                         
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  tz       America/Chicago             
##  date     2017-08-26
## Packages ------------------------------------------------------------------------------------------
##  package       * version    date       source                            
##  ade4          * 1.7-8      2017-08-09 cran (@1.7-8)                     
##  adegenet      * 2.1.0      2017-07-17 local                             
##  ape             4.1        2017-02-14 CRAN (R 3.4.0)                    
##  assertthat      0.2.0      2017-04-11 CRAN (R 3.4.0)                    
##  backports       1.1.0      2017-05-22 CRAN (R 3.4.0)                    
##  base          * 3.4.1      2017-07-07 local                             
##  bindr           0.1        2016-11-13 CRAN (R 3.4.0)                    
##  bindrcpp      * 0.2        2017-06-17 CRAN (R 3.4.0)                    
##  bitops          1.0-6      2013-08-17 CRAN (R 3.4.0)                    
##  boot            1.3-20     2017-07-30 CRAN (R 3.4.1)                    
##  broom           0.4.2      2017-02-13 CRAN (R 3.4.0)                    
##  caTools       * 1.17.1     2014-09-10 CRAN (R 3.4.0)                    
##  cellranger      1.1.0      2016-07-27 CRAN (R 3.4.0)                    
##  cluster         2.0.6      2017-03-16 CRAN (R 3.4.0)                    
##  coda            0.19-1     2016-12-08 CRAN (R 3.4.0)                    
##  colorspace      1.3-2      2016-12-14 CRAN (R 3.4.0)                    
##  compiler        3.4.1      2017-07-07 local                             
##  datasets      * 3.4.1      2017-07-07 local                             
##  deldir          0.1-14     2017-04-22 CRAN (R 3.4.0)                    
##  devtools        1.13.3     2017-08-02 CRAN (R 3.4.1)                    
##  digest          0.6.12     2017-01-27 CRAN (R 3.4.0)                    
##  dplyr         * 0.7.2      2017-07-20 CRAN (R 3.4.1)                    
##  evaluate        0.10.1     2017-06-24 CRAN (R 3.4.1)                    
##  expm            0.999-2    2017-03-29 CRAN (R 3.4.0)                    
##  fastmatch       1.1-0      2017-01-28 CRAN (R 3.4.0)                    
##  feather       * 0.3.1      2016-11-09 CRAN (R 3.4.0)                    
##  flux          * 0.3-0      2014-04-25 CRAN (R 3.4.0)                    
##  forcats         0.2.0      2017-01-23 CRAN (R 3.4.0)                    
##  foreign         0.8-69     2017-06-21 CRAN (R 3.4.0)                    
##  gdata           2.18.0     2017-06-06 CRAN (R 3.4.0)                    
##  ggplot2       * 2.2.1      2016-12-30 CRAN (R 3.4.0)                    
##  glue            1.1.1      2017-06-21 CRAN (R 3.4.0)                    
##  gmodels         2.16.2     2015-07-22 CRAN (R 3.4.0)                    
##  graphics      * 3.4.1      2017-07-07 local                             
##  grDevices     * 3.4.1      2017-07-07 local                             
##  grid          * 3.4.1      2017-07-07 local                             
##  gridExtra       2.2.1      2016-02-29 CRAN (R 3.4.0)                    
##  gtable        * 0.2.0      2016-02-26 CRAN (R 3.4.0)                    
##  gtools          3.5.0      2015-05-29 CRAN (R 3.4.0)                    
##  haven           1.1.0      2017-07-09 CRAN (R 3.4.1)                    
##  here            0.1        2017-05-28 CRAN (R 3.4.0)                    
##  hms             0.3        2016-11-22 CRAN (R 3.4.0)                    
##  htmltools       0.3.6      2017-04-28 CRAN (R 3.4.0)                    
##  httpuv          1.3.5      2017-07-04 CRAN (R 3.4.1)                    
##  httr            1.2.1      2016-07-03 CRAN (R 3.4.0)                    
##  igraph          1.1.2      2017-07-21 cran (@1.1.2)                     
##  jsonlite        1.5        2017-06-01 CRAN (R 3.4.0)                    
##  knitr           1.16       2017-05-18 CRAN (R 3.4.0)                    
##  labeling        0.3        2014-08-23 CRAN (R 3.4.0)                    
##  lattice         0.20-35    2017-03-25 CRAN (R 3.4.0)                    
##  lazyeval        0.2.0      2016-06-12 CRAN (R 3.4.0)                    
##  LearnBayes      2.15       2014-05-29 CRAN (R 3.4.0)                    
##  lubridate       1.6.0      2016-09-13 CRAN (R 3.4.0)                    
##  magrittr      * 1.5        2014-11-22 CRAN (R 3.4.0)                    
##  MASS            7.3-47     2017-04-21 CRAN (R 3.4.0)                    
##  Matrix          1.2-10     2017-04-28 CRAN (R 3.4.0)                    
##  memoise         1.1.0      2017-04-21 CRAN (R 3.4.0)                    
##  methods       * 3.4.1      2017-07-07 local                             
##  mgcv            1.8-18     2017-07-28 CRAN (R 3.4.1)                    
##  mime            0.5        2016-07-07 CRAN (R 3.4.0)                    
##  mnormt          1.5-5      2016-10-15 CRAN (R 3.4.0)                    
##  modelr          0.1.1      2017-07-24 CRAN (R 3.4.1)                    
##  munsell         0.4.3      2016-02-13 CRAN (R 3.4.0)                    
##  nlme            3.1-131    2017-02-06 CRAN (R 3.4.0)                    
##  parallel        3.4.1      2017-07-07 local                             
##  pegas           0.10       2017-05-03 CRAN (R 3.4.0)                    
##  permute         0.9-4      2016-09-09 CRAN (R 3.4.0)                    
##  phangorn        2.2.0      2017-04-03 CRAN (R 3.4.0)                    
##  pinfsc50        1.1.0      2016-12-02 CRAN (R 3.4.0)                    
##  pkgconfig       2.0.1      2017-03-21 CRAN (R 3.4.0)                    
##  plyr            1.8.4      2016-06-08 CRAN (R 3.4.0)                    
##  poppr         * 2.4.1.99-2 2017-08-27 Github (grunwaldlab/poppr@cd4cba2)
##  poweRlaw      * 0.70.0     2016-12-22 CRAN (R 3.4.0)                    
##  psych           1.7.5      2017-05-03 CRAN (R 3.4.0)                    
##  purrr         * 0.2.3      2017-08-02 CRAN (R 3.4.1)                    
##  quadprog        1.5-5      2013-04-17 CRAN (R 3.4.0)                    
##  R6              2.2.2      2017-06-17 cran (@2.2.2)                     
##  Rcpp            0.12.12    2017-07-15 cran (@0.12.12)                   
##  readr         * 1.1.1      2017-05-16 CRAN (R 3.4.0)                    
##  readxl          1.0.0      2017-04-18 CRAN (R 3.4.0)                    
##  reshape2        1.4.2      2016-10-22 CRAN (R 3.4.0)                    
##  rlang           0.1.1      2017-05-18 CRAN (R 3.4.0)                    
##  rmarkdown       1.6        2017-06-15 cran (@1.6)                       
##  rprojroot       1.2        2017-01-16 CRAN (R 3.4.0)                    
##  rvest           0.3.2      2016-06-17 CRAN (R 3.4.0)                    
##  scales          0.4.1.9002 2017-08-02 Github (hadley/scales@842ad87)    
##  seqinr          3.4-5      2017-08-01 CRAN (R 3.4.1)                    
##  shiny           1.0.5      2017-08-23 cran (@1.0.5)                     
##  sp              1.2-5      2017-06-29 CRAN (R 3.4.1)                    
##  spdep           0.6-13     2017-04-25 CRAN (R 3.4.0)                    
##  splines         3.4.1      2017-07-07 local                             
##  stats         * 3.4.1      2017-07-07 local                             
##  stats4          3.4.1      2017-07-07 local                             
##  stringi         1.1.5      2017-04-07 CRAN (R 3.4.0)                    
##  stringr         1.2.0      2017-02-18 CRAN (R 3.4.0)                    
##  tibble        * 1.3.3      2017-05-28 CRAN (R 3.4.0)                    
##  tidyr         * 0.6.3      2017-05-15 CRAN (R 3.4.0)                    
##  tidyverse     * 1.1.1      2017-01-27 CRAN (R 3.4.0)                    
##  tools           3.4.1      2017-07-07 local                             
##  utils         * 3.4.1      2017-07-07 local                             
##  vcfR          * 1.5.0      2017-05-18 cran (@1.5.0)                     
##  vegan           2.4-4      2017-08-24 cran (@2.4-4)                     
##  VGAM            1.0-4      2017-07-25 CRAN (R 3.4.1)                    
##  viridis       * 0.4.0      2017-03-27 CRAN (R 3.4.0)                    
##  viridisLite   * 0.2.0      2017-03-24 CRAN (R 3.4.0)                    
##  withr           2.0.0      2017-07-28 CRAN (R 3.4.1)                    
##  xml2            1.1.1      2017-01-24 CRAN (R 3.4.0)                    
##  xtable          1.8-2      2016-02-05 CRAN (R 3.4.0)                    
##  yaml            2.1.14     2016-11-12 CRAN (R 3.4.0)                    
##  zksimanalysis * 0.9.0.9000 2017-08-26 local